http://arxiv.org/pdf/1402.1869v2.pdf

"A linear region of a piecewise linear function $F : R^{n_0} \to R^m$ is a maximal connected subset of the input-space $R^{n_0}$, on which $F$ is linear. For the functions that we consider, each linear region has full dimension, $n_0$."


In [1]:
import numpy as np
import numpy.random as npr
import pylab as pl
import seaborn as sns
pl.rcParams['font.family']='Serif'
%matplotlib inline

In [2]:
def F(x):
    if x < 0:
        return 0
    elif x < 5:
        return 2*x
    else:
        return 10

In [201]:
x = np.arange(-10,10,0.01)
y = np.array([F(x) for x in x])

pl.subplot(3,1,1)
pl.title('Raw')
pl.plot(x,y)
pl.xlim(-3,8)
pl.ylim(-1,11)
pl.ylabel(r'$y$')

pl.subplot(3,1,2)
pl.title('First derivative')
deriv = np.diff(y,1,axis=0)
pl.plot(x[1:],deriv);
pl.xlim(-3,8)
pl.ylim(np.min(deriv)-0.01,np.max(deriv)+0.01)
pl.ylabel(r'$dy/dx$')

pl.subplot(3,1,3)
pl.title('Second derivative')
deriv2 = np.diff(y,2,axis=0)
pl.plot(x[2:],deriv2);
pl.xlim(-3,8)
pl.ylim(np.min(deriv2)-0.01,np.max(deriv2)+0.01)
pl.ylabel(r'$d^2y/dx^2$')

pl.tight_layout()

pl.xlabel(r'$x$')


Out[201]:
<matplotlib.text.Text at 0x145004240>

In [11]:
relu = lambda x: x*(x>0)
sigmoid = lambda x : 1.0 / (1.0 + np.exp(-x))

class FeedforwardNet():
    def __init__(self,layer_dims=[32,16,8,4,2,4,8,16,32],
                 activation=relu):
        self.activation=activation
        self.layer_dims = layer_dims
        self.W = []
        self.b = []
        for i in range(1,len(layer_dims)):
            self.W.append(npr.randn(layer_dims[i-1],layer_dims[i]))
            self.b.append(npr.randn())
        self.num_params = sum([np.prod(w.shape) for w in self.W]) + len(self.b)
        self.bottleneck = np.argmin(np.array(layer_dims))
        
    def mats_to_vec(self,W,b):
        w_vecs = []
        for w in W:
            w_vecs.append(np.reshape(w,np.prod(w.shape)))
        w_vecs.append(np.array(b))
        return np.hstack(w_vecs)
    
    def vec_to_mats(self,w_vecs):
        ind = 0
        W = []
        
        for i in range(len(self.W)):
            size = np.prod(self.W[i].shape)
            W.append(np.reshape(w_vecs[ind:ind+size],self.W[i].shape))
            ind += size
        bias = w_vecs[ind:]
        return W,bias

    def transform(self,X,end_layer=None):
        if end_layer==None:
            end_layer = len(self.layer_dims)-1
        def transform_one(x):
            L = x
            for i in range(end_layer):
                L = self.activation(L.dot(self.W[i])+self.b[i])
            return L
        
        if len(X.shape) > 1:
            y = np.zeros((len(X),self.layer_dims[end_layer]))
            for i,x in enumerate(X):
                y[i] = transform_one(x)
        else:
            y = transform_one(X)
        return y

In [425]:
size=100
inputs = npr.rand(100000,2)*size-(size/2)

In [447]:
layer_sizes = [2,10,10,2]

npr.seed(10)
sigmoid_net = FeedforwardNet(layer_sizes,sigmoid)
outputs = sigmoid_net.transform(inputs)
pl.title('Sigmoid')
pl.scatter(inputs[:,0],inputs[:,1],c=outputs[:,0],linewidths=0,cmap='rainbow')
pl.xlim(-(size/2),size/2)
pl.ylim(-(size/2),size/2)

pl.figure()
pl.scatter(outputs[:,0],outputs[:,1],c=inputs[:,0]*inputs[:,1],linewidths=0,cmap='rainbow')

pl.figure()
npr.seed(10)
relu_net = FeedforwardNet(layer_sizes,relu)
outputs = relu_net.transform(inputs)
pl.title('ReLU')
pl.scatter(inputs[:,0],inputs[:,1],c=outputs[:,0],linewidths=0,cmap='rainbow')
pl.xlim(-(size/2),size/2)
pl.ylim(-(size/2),size/2)

pl.figure()
pl.scatter(outputs[:,0],outputs[:,1],c=inputs[:,0]*inputs[:,1],linewidths=0,cmap='rainbow')


Out[447]:
<matplotlib.collections.PathCollection at 0x1492dfe10>

In [557]:
size=100
inputs = npr.rand(100000,2)*size-(size/2)
layer_sizes = [2,10,10,2]

npr.seed(10)
sigmoid_net = FeedforwardNet(layer_sizes,sigmoid)
outputs = sigmoid_net.transform(inputs)
perturbation_0 = np.zeros(inputs.shape)
perturbation_0[:,0] = np.ones(len(inputs))*dx
outputs_0 = sigmoid_net.transform(inputs + perturbation_0)
perturbation_1 = np.zeros(inputs.shape)
perturbation_1[:,1] = np.ones(len(inputs))*dx
outputs_1 = sigmoid_net.transform(inputs + perturbation_1)

gradient_mag = np.abs(outputs_0[:,0]-outputs[:,0]) + np.abs(outputs_1[:,0]-outputs[:,0])

pl.title('Sigmoid')
c = np.diff(outputs[:,0],1,axis=0)
pl.scatter(inputs[:,0],inputs[:,1],c=gradient_mag,linewidths=0,cmap='rainbow')
pl.xlim(-(size/2),size/2)
pl.ylim(-(size/2),size/2)

pl.figure()
npr.seed(10)
relu_net = FeedforwardNet(layer_sizes,relu)
outputs = relu_net.transform(inputs)
perturbation_0 = np.zeros(inputs.shape)
perturbation_0[:,0] = np.ones(len(inputs))*dx
outputs_0 = relu_net.transform(inputs + perturbation_0)
perturbation_1 = np.zeros(inputs.shape)
perturbation_1[:,1] = np.ones(len(inputs))*dx
outputs_1 = relu_net.transform(inputs + perturbation_1)

gradient_mag = np.abs(outputs_0[:,0]-outputs[:,0]) + np.abs(outputs_1[:,0]-outputs[:,0])

pl.title('ReLU')
pl.scatter(inputs[:,0],inputs[:,1],c=gradient_mag,linewidths=0,cmap='rainbow')
pl.xlim(-(size/2),size/2)
pl.ylim(-(size/2),size/2)

start_point = np.array((0,10))
direction = np.array((1,0))
dx=0.01
lambdas = np.arange(-size,size,dx)
points = np.array([start_point+lam*direction for lam in lambdas])



In [519]:
pl.title('ReLU')
pl.scatter(inputs[:,0],inputs[:,1],c=gradient_mag,linewidths=0,cmap='rainbow')
pl.xlim(-(size/2),size/2)
pl.ylim(-(size/2),size/2)

start_point = np.array((40,0))
direction = np.ones(2)/np.sqrt(2)
#direction[0] *= -1
dx=0.01
lambdas = np.arange(-size,size,dx)
points = np.array([start_point+lam*direction for lam in lambdas])

pl.plot(points[:,0],points[:,1],color='white',linestyle='--')


Out[519]:
[<matplotlib.lines.Line2D at 0x1684699b0>]

In [448]:
def plot_derivs(lambdas,outs):
    pl.plot(lambdas,outs);
    pl.title('Raw output')
    pl.ylabel(r'$y(\lambda)$')
    pl.xlabel(r'$\lambda$')
    pl.figure()
    pl.plot(lambdas[1:],np.diff(outs,1,axis=0));
    pl.title('First derivative')
    pl.ylabel(r'$\frac{\partial y}{\partial \lambda}$')
    pl.xlabel(r'$\lambda$')
    pl.figure()
    pl.plot(lambdas[2:],np.diff(outs,2,axis=0));
    pl.title('Second derivative')
    pl.ylabel(r'$\frac{\partial^2 y}{\partial \lambda^2}$')
    pl.xlabel(r'$\lambda$')

In [520]:
print('ReLU')
outs = relu_net.transform(points)
plot_derivs(lambdas,outs)


ReLU

In [521]:
print('Sigmoid')
outs = sigmoid_net.transform(points)
plot_derivs(lambdas,outs)


Sigmoid

In [368]:
npr.seed(1)
net = FeedforwardNet([32,64,64,64],relu)

In [369]:
# start at a point in input space and project along a random direction

#direction = npr.randn(32)
direction = np.zeros(32)
direction[0] = 1.0
#start_point = npr.randn(32)
start_point = np.ones(32)

size=100
dx=0.01
lambdas = np.arange(-size,size,dx)
points = np.array([start_point+lam*direction for lam in lambdas])

In [370]:
# see what happens to network output
outs = net.transform(points)
outs.shape


Out[370]:
(20000, 64)

In [371]:
np.diff(outs,axis=0).shape


Out[371]:
(19999, 64)

In [372]:
outs.shape,np.min(outs,0).shape


Out[372]:
((20000, 64), (64,))

In [430]:
plot_derivs(lambdas,outs)



In [374]:
n=7
pl.plot(lambdas[n:],np.diff(outs,n,axis=0));
pl.title('Second derivative')
pl.ylabel(r'$\frac{\partial^n y}{\partial \lambda^n}$')
pl.xlabel(r'$\lambda$')


Out[374]:
<matplotlib.text.Text at 0x142e56a20>

In [375]:
n2 = len(outs)-2
pl.scatter(lambdas[2:],(np.sum(np.isclose(np.diff(outs,2,axis=0),0),1)!=outs.shape[1])+npr.randn(n2)*0.03,
           alpha=0.5,linewidths=0)
pl.ylim(0.5,1.5)


Out[375]:
(0.5, 1.5)

In [376]:
discontinuities = lambdas[2:][(np.sum(np.isclose(np.diff(outs,2,axis=0),0),1)!=outs.shape[1])]

In [377]:
sns.kdeplot(discontinuities)
pl.title('Discontinuities around (1,1,1,...,1,1,1)')
pl.xlabel(r'$\lambda$')
pl.ylabel('Density of discontinuities')
pl.xlim(-size,size)


Out[377]:
(-100, 100)

In [365]:
start_point=np.ones(32)
num_discontinuities = []
for i in range(32):
    direction = np.zeros(32)
    direction[i] = 1.0

    size=100
    dx=0.01
    lambdas = np.arange(-size,size,dx)
    points = np.array([start_point+lam*direction for lam in lambdas])
    
    outs = net.transform(points)
    
    discontinuities = lambdas[2:][(np.sum(np.isclose(np.diff(outs,2,axis=0),0),1)!=outs.shape[1])]
    num_discontinuities.append(len(discontinuities))
    sns.kdeplot(discontinuities)
print(np.mean(num_discontinuities),np.std(num_discontinuities))


186.1875 32.3075431401

In [366]:
def discontinuity_density(start_point,
                          num_directions=32,
                          size=100,dx=0.01):
    num_discontinuities = []
    for i in range(num_directions):
        direction = npr.randn(len(start_point))
        direction /= np.linalg.norm(direction)

        lambdas = np.arange(-size,size,dx)
        points = np.array([start_point+lam*direction for lam in lambdas])

        outs = net.transform(points)

        discontinuities = lambdas[2:][(np.sum(np.isclose(np.diff(outs,2,axis=0),0),1)!=outs.shape[1])]
        discont_points = np.array([start_point+lam*direction for lam in discontinuities])
        
        
        num_discontinuities.append(len(discontinuities))
        sns.kdeplot(discontinuities)
        pl.xlim(-size,size)
    print(np.mean(num_discontinuities),np.std(num_discontinuities))

In [378]:
discontinuity_density(np.zeros(32))


315.15625 18.8300115756

In [337]:
discontinuity_density(np.ones(32))


306.0 13.311179512

In [341]:
discontinuity_density(np.ones(32)*3)


277.59375 13.4300488062

In [338]:
discontinuity_density(np.ones(32)*10)


202.5 16.807736314

In [339]:
discontinuity_density(np.ones(32)*100)


36.96875 6.11087337764

In [340]:
discontinuity_density(np.ones(32)*500)


10.4375 2.77192960769

In [472]:
from time import time

In [547]:
# hunting for discontinuities
# pick a random point, pick a random direction, store the locations of all discontinuities encountered


size=50
dx=0.01
all_discontinuities = []
num_points=1000
input_dim=2
initial_points = npr.randn(num_points,input_dim)*10
t = time()
all_points = []

for i in range(num_points):
    initial_point = initial_points[i]
    direction = npr.randn(len(start_point))
    direction /= np.linalg.norm(direction)

    lambdas = np.arange(-size,size,dx)
    points = np.array([initial_point+lam*direction for lam in lambdas])
    all_points.append(points)

    outs = net.transform(points)

    discontinuities = lambdas[2:][(np.sum(np.isclose(np.diff(outs,2,axis=0),0),1)!=outs.shape[1])]
    discont_points = np.array([initial_point+lam*direction for lam in discontinuities])
    all_discontinuities.append(discont_points)
    
    if i % 100 == 0:
        print(i)
        print(time() - t)


0
0.28504109382629395
100
28.416518926620483
200
56.65842390060425
300
84.95096397399902
400
113.33943605422974
500
141.7686219215393
600
170.10501909255981
700
198.24223399162292
800
226.13234496116638
900
254.47574591636658

In [548]:
all_discontinuities = np.vstack(all_discontinuities)
all_points = np.vstack(all_points)

In [549]:
norms = np.array([np.linalg.norm(d) for d in all_discontinuities])

In [550]:
pl.hist(norms,bins=50);



In [555]:
sns.kdeplot(norms,label='discontinuities')
sns.kdeplot(np.array([np.linalg.norm(p) for p in initial_points]),label='input centerpoints')
sns.kdeplot(np.array([np.linalg.norm(p) for p in all_points]),label='all tested points')
pl.title('Norms of discontinuous points')
pl.xlabel(r'$\ell_2$-norm')
pl.ylabel('Probability density')
pl.xlim(0,70)


Out[555]:
(0, 70)

In [620]:
axes = []
ax = sns.kdeplot(all_discontinuities[:,0],label='discontinuities (x)')
axes.append(ax)
ax = sns.kdeplot(all_discontinuities[:,1],label='discontinuities (y)')
axes.append(ax)
ax = sns.kdeplot(initial_points[:,0],label='input centerpoints (x)')
axes.append(ax)
ax = sns.kdeplot(initial_points[:,1],label='input centerpoints (y)')
axes.append(ax)
ax = sns.kdeplot(all_points[:,0],label='all testpoints (x)')
axes.append(ax)
ax = sns.kdeplot(all_points[:,1],label='all testpoints (y)')
axes.append(ax)

pl.title('Absolute coordinate values of discontinuous points')
pl.xlabel('Value')
pl.ylabel('Probability density')
pl.xlim(-70,70)


Out[620]:
(-70, 70)

In [558]:
axes = []
ax = sns.kdeplot(all_discontinuities[:,0],label='discontinuities')
axes.append(ax)
ax = sns.kdeplot(initial_points[:,0],label='input centerpoints')
axes.append(ax)
ax = sns.kdeplot(all_points[:,0],label='all testpoints')
axes.append(ax)

pl.title('Coordinate values of discontinuous points')
pl.xlabel('Value')
pl.ylabel('Probability density')
pl.xlim(-70,70)


Out[558]:
(-70, 70)

In [586]:
l = ax.lines[2]
test_dist = l.get_xydata()

l = ax.lines[0]
res_dist = l.get_xydata()

In [587]:
res_dist.shape


Out[587]:
(100, 2)

In [589]:
np.min(res_dist[:,0]),np.max(res_dist[:,0])


Out[589]:
(-73.081896943452463, 66.682788869329045)

In [590]:
np.min(test_dist[:,0]),np.max(test_dist[:,0])


Out[590]:
(-74.296259502098394, 80.552662803541253)

In [594]:
pl.plot(np.interp(res_dist[:,0],test_dist[:,0],test_dist[:,1])/np.interp(test_dist[:,0],test_dist[:,0],test_dist[:,1]))


Out[594]:
[<matplotlib.lines.Line2D at 0x169a711d0>]

In [588]:
pl.plot(res_dist[:,1]/test_dist[:,1])


Out[588]:
[<matplotlib.lines.Line2D at 0x14d733240>]

In [584]:
quotient = []
for i in range(len(test_dist)):
    if test_dist[i,0]==res_dist[i,0]:
        quotient.append(res_dist[i,1]/test_dist[i,1])
pl.plot(quotient)


Out[584]:
[<matplotlib.lines.Line2D at 0x15698db70>]

In [648]:
# hunting for discontinuities
# pick a random point, pick a random direction, store the locations of all discontinuities encountered

layer_sizes = [32,64,64,64]

npr.seed(10)
net = FeedforwardNet(layer_sizes)

size=5
dx=0.01
all_discontinuities = []
num_points=1000
input_dim=layer_sizes[0]
initial_points = npr.rand(num_points,input_dim)
t = time()
all_points = []

for i in range(num_points):
    initial_point = initial_points[i]
    direction = npr.randn(input_dim)
    direction /= np.linalg.norm(direction)

    lambdas = np.arange(-size,size,dx)
    points = np.array([initial_point+lam*direction for lam in lambdas])
    all_points.append(points)

    outs = net.transform(points)

    discontinuities = lambdas[2:][(np.sum(np.isclose(np.diff(outs,2,axis=0),0),1)!=outs.shape[1])]
    discont_points = np.array([initial_point+lam*direction for lam in discontinuities])
    all_discontinuities.append(discont_points)
    
    if i % 100 == 0:
        print(i)
        print(time() - t)


0
0.03809499740600586
100
2.7550251483917236
200
5.440649032592773
300
8.10377311706543
400
10.853191137313843
500
13.524181127548218
600
16.25196099281311
700
18.93080711364746
800
21.587051153182983
900
24.217501163482666

In [649]:
all_discontinuities = np.vstack(all_discontinuities)
all_points = np.vstack(all_points)
norms = np.array([np.linalg.norm(d) for d in all_discontinuities])

In [650]:
sns.kdeplot(norms,label='discontinuities')
#sns.kdeplot(np.array([np.linalg.norm(p) for p in initial_points]),label='input centerpoints')
sns.kdeplot(np.array([np.linalg.norm(p) for p in all_points]),label='all tested points')
pl.title('Norms of discontinuous points')
pl.xlabel(r'$\ell_2$-norm')
pl.ylabel('Probability density')
#pl.xlim(0,70)


Out[650]:
<matplotlib.text.Text at 0x16bde4a58>

In [633]:
sns.kdeplot(np.array([np.linalg.norm(p) for p in npr.rand(1000,32)]))


Out[633]:
<matplotlib.axes._subplots.AxesSubplot at 0x169913c18>

In [603]:
pl.plot(np.interp(np.arange(-10,10,0.1),npr.randn(10)*5,npr.randn(10)))


Out[603]:
[<matplotlib.lines.Line2D at 0x159f23ac8>]

In [616]:
npr.seed(0)
layer_one = (npr.randn(100)*5,npr.randn(100))
layer_two = (npr.randn(10)*5,npr.randn(10))

In [617]:
one_out = np.interp(np.arange(-10,10,0.1),*layer_one)
two_out = np.interp(np.arange(-10,10,0.1),*layer_two)
one_two_out = np.interp(one_out,*layer_two)

In [618]:
pl.plot(one_out)
pl.title('Layer 1')
pl.figure()
pl.plot(two_out)
pl.figure()
pl.plot(one_two_out)


Out[618]:
[<matplotlib.lines.Line2D at 0x14993c710>]

In [ ]: